## Create all figures for supplementary information -----
library(tidyverse)
library(ggpubr)
library(readstata13)
library(cdlTools)
library(ggplot2)
library(mapproj)
#-------------------------------------------------------#
# SI Figure 1: Distribution of logged total value of yearly transfers -----
#-------------------------------------------------------#
load('data/city-aggregate.RData')

agency <- agency %>%
  mutate(aidPerCap = sumallLESOaidBG/Total_Population,
         aidPerCap_log = log(1+sumallLESOaidBG)/Total_Population) 

ggplot(data = agency[agency$aidPerCap_log>0,], aes(x=aidPerCap_log)) + 
  geom_density(color='black', fill='grey') + theme_bw() +
  labs(x='Total Logged Aid Per Capita',
       y = 'Density')  
ggsave("figures/DistOfAidPerCap.pdf")

#-------------------------------------------------------#
# SI Figure 2: logged aid and crime rates -----
#-------------------------------------------------------#
ggplot(data = agency, aes(x=aidPerCap_log, y = allcrime_rate)) + geom_point() +
  geom_smooth() + ylim(c(0,75000)) + theme_bw()+xlab("Total Logged Aid Per Capita")+
  ylab("Total Crime Rate Per 100,000 Population")
ggsave("figures/crimeAidCoplot.pdf")

rm(list = ls())

#-------------------------------------------------------#
# SI Figures 4-7: maps of discrepancies across BG and HPBM ------
#-------------------------------------------------------#
#Reading in the data
bg <- read.dta13("raw-data/militarization.dta")%>%
  mutate(fips = str_pad(fips,width=5,side="left",pad=0))%>%
  filter(year>2005) %>%
  rename(sum_valuebg = total_raw) %>%
  mutate(sum_itemsbg = exp(log_sum_items),
         sum_itemsbg = if_else(sum_itemsbg==1, 0, sum_itemsbg))

harris <- read.dta13("raw-data/HBPM-AEJ-fips-unlocked.dta")%>%
  mutate(fips = str_pad(fips,width=5,side="left",pad=0),
         sum_valueharris = exp(log_sum_value),
         sum_valueharris = if_else(sum_valueharris==1, 0, sum_valueharris),
         sum_itemsharris = exp(log_sum_items),
         sum_itemsharris = if_else(sum_itemsharris==1, 0, sum_itemsharris))

#Merging into single data set based on fips and year
data <- left_join(bg, harris,by=c("fips"="fips","year"="year")) %>%
  mutate(fipsst = substr(fips, start = 1, stop = 2),
         state = fips(fipsst, to = "Name"),
         sumdifferenceyear = abs(sum_valueharris-sum_valuebg), #94% of cases are when Harris numbers bigger than BG
         sumdifferenceyearnoabs = sum_valueharris-sum_valuebg)%>%
  select(sumdifferenceyear,sumdifferenceyearnoabs,county,fips,fipsst,state,year,
         sum_valuebg,sum_valueharris,sum_itemsbg,sum_itemsharris) 

county.map <- data %>%
  group_by(county,state)%>%
  summarise(sumdifference=sum(sumdifferenceyear),
            sumdifferencenoabs=sum(sumdifferenceyearnoabs)) %>%
  ungroup()

county.map <- county.map %>%
  mutate_at(.vars = "county", 
            .funs = gsub,
            pattern = " County| Parish| city",
            replacement = "")%>%
  mutate_at(.vars = "state", 
            .funs = gsub,
            pattern = "Deleware",
            replacement = "Delaware")%>%
  mutate_at(.vars = "county", 
            .funs = gsub,
            pattern = "'",
            replacement = "")%>%
  mutate(subregion=tolower(county),region=tolower(state)) %>%
  na.omit(sumdifference)

counties <- map_data("county") 

counties <- counties %>%
  mutate_at(.vars = "subregion", 
            .funs = gsub,
            pattern = "st ",
            replacement = "st. ") %>%
  mutate_at(.vars ="subregion", .funs=funs(recode(., "de kalb" = "dekalb","de soto"= "desoto","du page"="dupage",
                                                  "obrien"="o'brien",
                                                  "yellowstone national"="yellowstone","debaca"="de baca",
                                                  "mckean"="mc kean")))%>%
  mutate_at(.vars = "subregion", 
            .funs = gsub,
            pattern = "ste ",
            replacement = "ste. ") %>%
  mutate_at(.vars = "subregion", 
            .funs = gsub,
            pattern = "east.",
            replacement = "east") %>%
  mutate_at(.vars = "subregion", 
            .funs = gsub,
            pattern = "west.",
            replacement = "west")%>%
  mutate_at(.vars = "subregion", 
            .funs = gsub,
            pattern = "city",
            replacement = "")

ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
) 

dataandlatlong <- full_join(county.map,counties) %>% 
  mutate_all(funs(ifelse(is.na(.), 0, .)))

cumsumabs <- ggplot(dataandlatlong, aes(x=long, y=lat, group=group, fill=cut_number(sumdifference, 5))) +
  theme_bw()+
  geom_polygon()+coord_map()+ditch_the_axes + 
  scale_fill_manual(values = c("gray45","lightgoldenrod","orange","tomato","firebrick4"),
                    name= "Absolute Difference between Harris \n and BG Sum Awards Given",
                    labels=c("0","1 to 15,199","15,200 to 73,999","74,000 to 391,999","392,000 to 199,000,000")) + 
  theme(legend.position = c(0.52, 0.65),
        legend.key.size = unit(0.01,"line"),legend.title=element_text(size=6),
        legend.text=element_text(size=5),
        plot.margin=unit(c(0, -8, 0, -.2),"cm")) # top, then right, bottom and left
cumsumabs
ggsave("figures/bg_harris_map_comparison.pdf",width=11,height=7)

#not absolute numbers
cumsumnoabs <- ggplot(dataandlatlong, aes(x=long, y=lat, group=group, fill=cut_number(sumdifferencenoabs, 5))) +
  theme_bw()+
  geom_polygon()+coord_map()+ditch_the_axes + 
  scale_fill_manual(values = c("gray45","lightgoldenrod","orange","tomato","firebrick4"),
                    name= "Difference between Harris \n and BG Sum Awards Given",
                    labels=c("-198,000,000 to -3,909","-3,910 to -1","0 to 14,999",
                             "15,000 to 86,499","86,500 to 63,400,000")) + 
  theme(legend.position = c(0.52, 0.65),
        legend.key.size = unit(0.01,"line"),legend.title=element_text(size=6),
        legend.text=element_text(size=5),
        plot.margin=unit(c(0, -8, 0, -.2),"cm")) # top, then right, bottom and left
cumsumnoabs
ggsave("figures/bg_harris_map_comparisonNOABS.pdf",width=11,height=7)

#use one year instead of cumulative sum
county.mapavg <- data %>%
  group_by(county,state)%>%
  summarise(sumdifference=mean(sumdifferenceyear),
            sumdifferencenoabs=mean(sumdifferenceyearnoabs)) %>%
  ungroup()

county.mapavg <- county.mapavg %>%
  mutate_at(.vars = "county", 
            .funs = gsub,
            pattern = " County| Parish| city",
            replacement = "")%>%
  mutate_at(.vars = "state", 
            .funs = gsub,
            pattern = "Deleware",
            replacement = "Delaware")%>%
  mutate_at(.vars = "county", 
            .funs = gsub,
            pattern = "'",
            replacement = "")%>%
  mutate(subregion=tolower(county),region=tolower(state)) %>%
  na.omit(sumdifferenceyear)

dataandlatlongavg <- full_join(county.mapavg,counties)%>%
  mutate_all(funs(ifelse(is.na(.), 0, .)))

#make maps
avgdiffabs <- ggplot(dataandlatlongavg, aes(x=long, y=lat, group=group, fill=cut_number(sumdifference, 5))) +
  theme_bw()+
  geom_polygon()+coord_map()+ditch_the_axes + 
  scale_fill_manual(values = c("gray45","lightgoldenrod","orange","tomato","firebrick4"),
                    name= "Average Absolute Difference between \n Harris and BG Sum Awards Given",
                    labels=c("0 to 0.142","0.143 to 2,169","2,170 to 10,599","10,600 to 56,099",
                             "56,100 to 28,400,000")) + 
  theme(legend.position = c(0.52, 0.65),
        legend.key.size = unit(0.01,"line"),legend.title=element_text(size=6),
        legend.text=element_text(size=5),
        plot.margin=unit(c(0, -8, 0, -.2),"cm")) # top, then right, bottom and left
avgdiffabs
ggsave("figures/bg_harris_map_comparisonaverage.pdf",width=11,height=7)

#not absolute numbers
avgdiffnoabs <- ggplot(dataandlatlongavg, aes(x=long, y=lat, group=group, fill=cut_number(sumdifferencenoabs, 5))) +
  theme_bw()+
  geom_polygon()+coord_map()+ditch_the_axes + 
  scale_fill_manual(values = c("gray45","lightgoldenrod","orange","tomato","firebrick4"),
                    name= "Average Difference between Harris \n and BG Sum Awards Given",
                    labels=c("-28,200,000 to -560","-559 to -1","0 to 2,149","2,150 to 12,399",
                             "12,400 to 9,060,000"))+
  theme(legend.position = c(0.52, 0.65),
        legend.key.size = unit(0.01,"line"),legend.title=element_text(size=6),
        legend.text=element_text(size=5),
        plot.margin=unit(c(0, -8, 0, -.2),"cm")) # top, then right, bottom and left
avgdiffnoabs
ggsave("figures/bg_harris_map_comparisonNOABSaverage.pdf",width=11,height=7)

rm(list=ls())

#---------------------------------------------------------#
# SI Figure 8: Item value standard deviations in LESO data 
#---------------------------------------------------------#
#See file 15-create_figures_main.R

#---------------------------------------------------------#
# SI Figure 9: Visualization of BG first stage coefficients 
#---------------------------------------------------------#
load('data/results-BG.RData')

## BG figure first
BG_first <- unnest(first_stage_results, cols = c('first_stage'))
colnames(BG_first) <- c("term","Estimate","cluster_se","t.value","Pr.t")
BG_first <- BG_first %>%
  filter(term=='milex_iv') %>%
  mutate(group = c("BG County","Rep Agency","Rep County"))

# so figure will be in order we want
BG_first$group <- factor(BG_first$group, levels = c("BG County", "Rep County", "Rep Agency"))

# make CIs
BG_first$low95 <- BG_first$Estimate - (abs(qnorm(.025))*BG_first$cluster_se)
BG_first$up95 <- BG_first$Estimate + (abs(qnorm(.025))*BG_first$cluster_se)
BG_first$low99 <- BG_first$Estimate - (abs(qnorm(.005))*BG_first$cluster_se)
BG_first$up99 <- BG_first$Estimate + (abs(qnorm(.005))*BG_first$cluster_se)
 
# # make plots -------
BG_plot <- ggplot(BG_first, aes(x=group,y=Estimate, color=group)) +
  #facet_grid(ID~.) +
  geom_point() +
  geom_segment(aes(x=group, xend=group, y= low95, yend=up95),size=1) +
  geom_segment(aes(x=group, xend=group, y= low99, yend=up99)) +
  geom_hline(yintercept=0,linetype="dashed",color="red") +
  ggtitle("BG First Stage Coefficients") +
  xlab("") +
  theme_bw() +
  theme(legend.position = "none")
 
ggsave("figures/first-stage-plot-BG.pdf")

#-----------------------------------------------------------#
# SI Figure 10: Visualization of HPBM first stage coefficients 
#-----------------------------------------------------------#
load('data/results-HPBM.RData')
HPBM_first <- unnest(HPBM_first_stage_results, cols = c('first_stage'))
colnames(HPBM_first) <- c("term","Estimate","cluster_se","t.value","Pr.t")

iv_rows <- grep('zH',HPBM_first$term)
HPBM_first <- HPBM_first[iv_rows,] %>%
  mutate(group = c(rep("Rep Agency\nItems",4),
                   rep("Rep Agency\nValue",4),
                   rep("Rep County\nItems",4),
                   rep("Rep County\nValue",4),
                   rep("HPBM County\nItems",4),
                   rep("HPBM County\nValue",4)))

# so figure will be in order we want
HPBM_first$group <- factor(HPBM_first$group, levels = c("HPBM County\nItems",
                                                    "HPBM County\nValue",
                                                    "Rep County\nItems",
                                                    "Rep County\nValue",
                                                    "Rep Agency\nItems",
                                                    "Rep Agency\nValue"))

# make CIs
HPBM_first$low95 <- HPBM_first$Estimate - (abs(qnorm(.025))*HPBM_first$cluster_se)
HPBM_first$up95 <- HPBM_first$Estimate + (abs(qnorm(.025))*HPBM_first$cluster_se)
HPBM_first$low99 <- HPBM_first$Estimate - (abs(qnorm(.005))*HPBM_first$cluster_se)
HPBM_first$up99 <- HPBM_first$Estimate + (abs(qnorm(.005))*HPBM_first$cluster_se)

# Nicer instrument name
HPBM_first <- HPBM_first %>%
  mutate(iv_name = recode(term,
         zHUdI1 = 'Dist 1',
         zHUdIland = 'Land Area',
         zHUdI6 = 'Dist 6',
         zHUdIhidta = 'HIDTA',
         zHUd1 = 'Dist 1',
         zHUd6 = 'Dist 6',
         zHUdland = 'Land Area',
         zHUdhitda = 'HIDTA',
         zHUdI1_lag = 'Dist 1',
         zHUdIland_lag = 'Land Area',
         zHUdI6_lag = 'Dist 6',
         zHUdIhidta_lag = 'HIDTA',
         zHUd1_lag = 'Dist 1',
         zHUd6_lag = 'Dist 6',
         zHUdland_lag = 'Land Area',
         zHUdhitda_lag = 'HIDTA'))


## make plots -------
# loop through 4 instruments
instruments <- c('Dist 1', 'Dist 6', 'Land Area', 'HIDTA')
titles <- c('Distance 1', 'Distance 6', 'Land Area', 'HIDTA')
plot_list <- list()
for(i in 1:4){
  p <- ggplot(HPBM_first %>% filter(iv_name==instruments[i]), aes(x=group,y=Estimate, color=group)) +
    #facet_grid(ID~.) +
    geom_point() +
    geom_segment(aes(x=group, xend=group, y= low95, yend=up95),size=1) +
    geom_segment(aes(x=group, xend=group, y= low99, yend=up99)) +
    geom_hline(yintercept=0,linetype="dashed",color="red") +
    ggtitle(paste0("First Stage coefficients instruments: ",titles[i])) +
    xlab("") +
    theme_bw() +
    theme(legend.position = "none",
          axis.text.x = element_text(angle=45, hjust=1))
  plot_list[[i]] <- p
}

# save plots
for(i in 1:4){
  file_name <- paste0('figures/first-stage-plot-HPBM-',i,'.pdf')
  pdf(file_name)
  print(plot_list[[i]])
  dev.off()
}

# or do as facets???? ------
HPBM_facet <- ggplot(HPBM_first, aes(x=group,y=Estimate)) +
  facet_grid(iv_name~.,scales='free') +
  geom_hline(yintercept=0,linetype="dashed",color="red") +
  geom_point() +
  geom_segment(aes(x=group, xend=group, y= low95, yend=up95),size=1) +
  geom_segment(aes(x=group, xend=group, y= low99, yend=up99)) +
  ggtitle("HPBM First Stage Coefficients") +
  xlab("") +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle=45, hjust=1))

ggsave("figures/first-stage-plot-HPBM-facet.pdf")

# pdf(file='figures/first-stage-combined.pdf',width=12)
# ggarrange(BG_plot, HPBM_facet)
# dev.off()
